{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.00572624,  1.4012513 ,  0.57999897, -0.42973134, -0.00662858,\n",
       "       -0.13137841,  0.        ,  0.        ], dtype=float32)"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import gym\n",
    "# 定义环境\n",
    "class MyWrapper(gym.Wrapper):\n",
    "    def __init__(self):\n",
    "        env = gym.make('LunarLander-v2',render_mode='rgb_array')\n",
    "        super().__init__(env)\n",
    "        self.env = env\n",
    "\n",
    "    def reset(self):\n",
    "        state,_ = self.env.reset()\n",
    "        return state\n",
    "    def step(self,action):\n",
    "        state,reward,done,_,info = self.env.step(action)\n",
    "        return state,reward,done,info\n",
    "\n",
    "env = MyWrapper()\n",
    "env.reset()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "\n",
    "#打印游戏\n",
    "def show():\n",
    "    plt.imshow(env.render())\n",
    "    plt.show()\n",
    "\n",
    "show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 状态空间\n",
    "print(env.observation_space)\n",
    "# 一共8个数字，表示这个游戏的状态"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#动作空间\n",
    "print(env.action_space)\n",
    "#4个动作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(env.action_space.sample())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.step(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using cuda device\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<stable_baselines3.ppo.ppo.PPO at 0x18bef5d2490>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from stable_baselines3.common.env_util import make_vec_env\n",
    "from stable_baselines3 import PPO\n",
    "\n",
    "# 初始化模型\n",
    "model = PPO(\n",
    "    policy=\"MlpPolicy\",\n",
    "    env=make_vec_env(MyWrapper, n_envs=4),\n",
    "    n_steps=1024,\n",
    "    batch_size=64,\n",
    "    n_epochs=4,\n",
    "    gamma=0.999,\n",
    "    gae_lambda=0.98,\n",
    "    ent_coef=0.01,\n",
    "    verbose=1,\n",
    ")\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-129.48746221564943, 0.0)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from stable_baselines3.common.evaluation import evaluate_policy\n",
    "#测试\n",
    "evaluate_policy(model,env,n_eval_episodes=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#训练\n",
    "model.learn(total_timesteps=20_000)\n",
    "model.save(r'models/ppo_LunarLander-v2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-84.07420167062519, 217.08942709934294)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#测试\n",
    "model = PPO.load('models/ppo_LunarLander-v2.zip')\n",
    "evaluate_policy(model,env,n_eval_episodes=10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Gym",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
